From c85d7ccd79fb69bc096cd19bb8f95ac9534ffc23 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 24 Dec 2023 21:49:54 +0100 Subject: SMMU: Implement backing CPU page protect/unprotect --- src/core/device_memory_manager.h | 51 +++++++++++++- src/core/device_memory_manager.inc | 82 ++++++++++++++++++++-- .../host1x/gpu_device_memory_manager.cpp | 11 +++ src/video_core/host1x/gpu_device_memory_manager.h | 3 + 4 files changed, 141 insertions(+), 6 deletions(-) diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 0861b792d..71b95016c 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -5,6 +5,8 @@ #include #include +#include +#include #include "common/common_types.h" #include "common/virtual_buffer.h" @@ -23,6 +25,7 @@ struct DeviceMemoryManagerAllocator; template class DeviceMemoryManager { using DeviceInterface = typename Traits::DeviceInterface; + using DeviceMethods = Traits::DeviceMethods; public: DeviceMemoryManager(const DeviceMemory& device_memory); @@ -35,7 +38,7 @@ public: DAddr AllocatePinned(size_t size); void Free(DAddr start, size_t size); - void Map(DAddr address, VAddr virtual_address, size_t size, size_t p_id); + void Map(DAddr address, VAddr virtual_address, size_t size, size_t process_id); void Unmap(DAddr address, size_t size); // Write / Read @@ -57,6 +60,8 @@ public: size_t RegisterProcess(Memory::Memory* memory); void UnregisterProcess(size_t id); + void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); + private: static constexpr bool supports_pinning = Traits::supports_pinning; static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; @@ -90,8 +95,52 @@ private: Common::VirtualBuffer compressed_physical_ptr; Common::VirtualBuffer compressed_device_addr; + // Process memory interfaces + std::deque id_pool; std::deque registered_processes; + + // Memory protection management + + static constexpr size_t guest_max_as_bits = 39; + static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits; + static constexpr size_t guest_mask = guest_as_size - 1ULL; + static constexpr size_t process_id_start_bit = guest_max_as_bits; + + std::pair ExtractCPUBacking(size_t page_index) { + auto content = cpu_backing_address[page_index]; + const VAddr address = content & guest_mask; + const size_t process_id = static_cast(content >> process_id_start_bit); + return std::make_pair(process_id, address); + } + + void InsertCPUBacking(size_t page_index, VAddr address, size_t process_id) { + cpu_backing_address[page_index] = address | (process_id << page_index); + } + + Common::VirtualBuffer cpu_backing_address; + static constexpr size_t subentries = 4; + static constexpr size_t subentries_mask = subentries - 1; + class CounterEntry final { + public: + CounterEntry() = default; + + std::atomic_uint16_t& Count(std::size_t page) { + return values[page & subentries_mask]; + } + + const std::atomic_uint16_t& Count(std::size_t page) const { + return values[page & subentries_mask]; + } + + private: + std::array values{}; + }; + static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); + + static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; + using CachedPages = std::array; + std::unique_ptr cached_pages; }; } // namespace Core \ No newline at end of file diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 1f52b92d5..77410f72f 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -2,12 +2,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include #include "common/address_space.h" #include "common/address_space.inc" #include "common/alignment.h" +#include "common/assert.h" +#include "common/div_ceil.h" #include "common/scope_exit.h" #include "core/device_memory.h" #include "core/device_memory_manager.h" @@ -51,7 +54,11 @@ struct DeviceMemoryManagerAllocator { } DAddr AllocatePinned(size_t size) { - return pin_allocator.Allocate(size); + if constexpr (supports_pinning) { + return pin_allocator.Allocate(size); + } else { + return DAddr{}; + } } void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) { @@ -100,6 +107,7 @@ DeviceMemoryManager::DeviceMemoryManager(const DeviceMemory& device_memo interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { impl = std::make_unique>(); + cached_pages = std::make_unique(); } template @@ -132,14 +140,14 @@ void DeviceMemoryManager::Free(DAddr start, size_t size) { template void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size_t size, - size_t p_id) { - Core::Memory::Memory* process_memory = registered_processes[p_id]; + size_t process_id) { + Core::Memory::Memory* process_memory = registered_processes[process_id]; size_t start_page_d = address >> Memory::YUZU_PAGEBITS; size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; std::atomic_thread_fence(std::memory_order_acquire); for (size_t i = 0; i < num_pages; i++) { - auto* ptr = process_memory->GetPointer( - Common::ProcessAddress(virtual_address + i * Memory::YUZU_PAGESIZE)); + const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; + auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); if (ptr == nullptr) [[unlikely]] { compressed_physical_ptr[start_page_d + i] = 0; continue; @@ -147,6 +155,7 @@ void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size auto phys_addr = static_cast(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; compressed_physical_ptr[start_page_d + i] = phys_addr; compressed_device_addr[phys_addr - 1U] = static_cast(start_page_d + i); + InsertCPUBacking(start_page_d + i, new_vaddress, process_id); } std::atomic_thread_fence(std::memory_order_release); } @@ -159,6 +168,7 @@ void DeviceMemoryManager::Unmap(DAddr address, size_t size) { for (size_t i = 0; i < num_pages; i++) { auto phys_addr = compressed_physical_ptr[start_page_d + i]; compressed_physical_ptr[start_page_d + i] = 0; + cpu_backing_address[start_page_d + i] = 0; if (phys_addr != 0) { compressed_device_addr[phys_addr - 1] = 0; } @@ -301,4 +311,66 @@ void DeviceMemoryManager::UnregisterProcess(size_t id) { id_pool.push_front(id); } +template +void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { + u64 uncache_begin = 0; + u64 cache_begin = 0; + u64 uncache_bytes = 0; + u64 cache_bytes = 0; + const auto* MarkRegionCaching = &DeviceMemoryManager::DeviceMethods::MarkRegionCaching; + + std::atomic_thread_fence(std::memory_order_acquire); + const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); + size_t page = addr >> Memory::YUZU_PAGEBITS; + auto [process_id, base_vaddress] = ExtractCPUBacking(page); + size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; + auto* memory_interface = registered_processes[process_id]; + for (; page != page_end; ++page) { + std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); + + if (delta > 0) { + ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits::max(), + "Count may overflow!"); + } else if (delta < 0) { + ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); + } else { + ASSERT_MSG(false, "Delta must be non-zero!"); + } + + // Adds or subtracts 1, as count is a unsigned 8-bit value + count.fetch_add(static_cast(delta), std::memory_order_release); + + // Assume delta is either -1 or 1 + if (count.load(std::memory_order::relaxed) == 0) { + if (uncache_bytes == 0) { + uncache_begin = vpage; + } + uncache_bytes += Memory::YUZU_PAGESIZE; + } else if (uncache_bytes > 0) { + MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, + uncache_bytes, false); + uncache_bytes = 0; + } + if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { + if (cache_bytes == 0) { + cache_begin = vpage; + } + cache_bytes += Memory::YUZU_PAGESIZE; + } else if (cache_bytes > 0) { + MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, + true); + cache_bytes = 0; + } + vpage++; + } + if (uncache_bytes > 0) { + MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, + false); + } + if (cache_bytes > 0) { + MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, + true); + } +} + } // namespace Core \ No newline at end of file diff --git a/src/video_core/host1x/gpu_device_memory_manager.cpp b/src/video_core/host1x/gpu_device_memory_manager.cpp index 2ca445081..668c2f08b 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.cpp +++ b/src/video_core/host1x/gpu_device_memory_manager.cpp @@ -5,6 +5,17 @@ #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/rasterizer_interface.h" +namespace Tegra { + +struct MaxwellDeviceMethods { + static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address, + size_t size, bool caching) { + interface->RasterizerMarkRegionCached(address, size, caching); + } +}; + +} // namespace Tegra + template struct Core::DeviceMemoryManagerAllocator; template class Core::DeviceMemoryManager; diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h index 30ad52017..2fb77605e 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.h +++ b/src/video_core/host1x/gpu_device_memory_manager.h @@ -9,10 +9,13 @@ class RasterizerInterface; namespace Tegra { +struct MaxwellDeviceMethods; + struct MaxwellDeviceTraits { static constexpr bool supports_pinning = true; static constexpr size_t device_virtual_bits = 34; using DeviceInterface = typename VideoCore::RasterizerInterface; + using DeviceMethods = typename MaxwellDeviceMethods; }; using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager; -- cgit v1.2.3